GPX raw data import results¶

This notebook presents a resume of the GPX loaded data.

In [ ]:
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import gpxpy
import gpxpy.gpx
from fiona.crs import from_epsg
from keplergl import KeplerGl
import geopandas as gpd
import movingpandas as mpd
import uuid
import hvplot.pandas
from pyproj import CRS
In [ ]:
# Injected parameters
from dagster import seven as __dm_seven
import dagstermill as __dm_dagstermill
context = __dm_dagstermill._reconstitute_job_context(
    **{
        key: __dm_seven.json.loads(value)
        for key, value
        in {'executable_dict': '{"__class__": "ReconstructablePipeline", "asset_selection": {"__frozenset__": [{"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_01_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_02_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db_track"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_01_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_02_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_20230428_artefacto_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_smooth"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_smooth"]}]}, "pipeline_name": "__ASSET_JOB", "repository": {"__class__": "ReconstructableRepository", "container_context": null, "container_image": null, "entry_point": ["dagster"], "executable_path": "/home/jag/anaconda3/envs/llacta-rumbos/bin/python3.10", "pointer": {"__class__": "ModuleCodePointer", "fn_name": "defs", "module": "tutorial_project", "working_directory": "/home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project"}, "repository_load_data": null}, "solid_selection_str": null}', 'job_run_dict': '{"__class__": "PipelineRun", "asset_selection": {"__frozenset__": [{"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_01_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto", "raw_explore", "MG91_artefacto_reloj_20230428_02_explore"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db"]}, {"__class__": "AssetKey", "path": ["MG91_20230428_artefacto_traj_db_track"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_01_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02"]}, {"__class__": "AssetKey", "path": ["MG91_artefacto_reloj_20230428_02_traj_smooth_db"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_01_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91", "MG91_20230428_artefacto", "gpx", "MG91_artefacto_reloj_20230428_02_gpx"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_20230428_artefacto_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_01_traj_smooth"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_clean"]}, {"__class__": "AssetKey", "path": ["workdir", "MG91_artefacto_reloj_20230428_02_traj_smooth"]}]}, "execution_plan_snapshot_id": "53d72acd7cbb9973aaedde948f0d1dcfe094cf59", "external_pipeline_origin": {"__class__": "ExternalPipelineOrigin", "external_repository_origin": {"__class__": "ExternalRepositoryOrigin", "repository_location_origin": {"__class__": "ManagedGrpcPythonEnvRepositoryLocationOrigin", "loadable_target_origin": {"__class__": "LoadableTargetOrigin", "attribute": null, "executable_path": null, "module_name": "tutorial_project", "package_name": null, "python_file": null, "working_directory": "/home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project"}, "location_name": "tutorial_project"}, "repository_name": "__repository__"}, "pipeline_name": "__ASSET_JOB"}, "has_repository_load_data": false, "mode": null, "parent_run_id": null, "pipeline_code_origin": {"__class__": "PipelinePythonOrigin", "pipeline_name": "__ASSET_JOB", "repository_origin": {"__class__": "RepositoryPythonOrigin", "code_pointer": {"__class__": "ModuleCodePointer", "fn_name": "defs", "module": "tutorial_project", "working_directory": "/home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project"}, "container_context": {}, "container_image": null, "entry_point": ["dagster"], "executable_path": "/home/jag/anaconda3/envs/llacta-rumbos/bin/python3.10"}}, "pipeline_name": "__ASSET_JOB", "pipeline_snapshot_id": "ad06cdf3396cfaebb77877426f5d0d38d6e149c3", "root_run_id": null, "run_config": {}, "run_id": "ed1a0d6a-ea17-477c-9eca-74e1f40d4d81", "solid_selection": null, "solids_to_execute": null, "status": {"__enum__": "PipelineRunStatus.STARTING"}, "step_keys_to_execute": null, "tags": {".dagster/grpc_info": "{\\"host\\": \\"localhost\\", \\"socket\\": \\"/tmp/tmpf5j209eu\\"}"}}', 'node_handle_kwargs': '{"name": "MG91_20230428_artefacto__raw_explore__MG91_artefacto_reloj_20230428_01_explore", "parent": null}', 'instance_ref_dict': '{"__class__": "InstanceRef", "compute_logs_data": {"__class__": "ConfigurableClassData", "class_name": "LocalComputeLogManager", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/storage\\n", "module_name": "dagster.core.storage.local_compute_log_manager"}, "custom_instance_class_data": null, "event_storage_data": {"__class__": "ConfigurableClassData", "class_name": "SqliteEventLogStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/history/runs/\\n", "module_name": "dagster.core.storage.event_log"}, "local_artifact_storage_data": {"__class__": "ConfigurableClassData", "class_name": "LocalArtifactStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output\\n", "module_name": "dagster.core.storage.root"}, "run_coordinator_data": {"__class__": "ConfigurableClassData", "class_name": "DefaultRunCoordinator", "config_yaml": "{}\\n", "module_name": "dagster.core.run_coordinator"}, "run_launcher_data": {"__class__": "ConfigurableClassData", "class_name": "DefaultRunLauncher", "config_yaml": "{}\\n", "module_name": "dagster"}, "run_storage_data": {"__class__": "ConfigurableClassData", "class_name": "SqliteRunStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/history/\\n", "module_name": "dagster.core.storage.runs"}, "schedule_storage_data": {"__class__": "ConfigurableClassData", "class_name": "SqliteScheduleStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output/schedules\\n", "module_name": "dagster.core.storage.schedules"}, "scheduler_data": {"__class__": "ConfigurableClassData", "class_name": "DagsterDaemonScheduler", "config_yaml": "{}\\n", "module_name": "dagster.core.scheduler"}, "secrets_loader_data": null, "settings": {}, "storage_data": {"__class__": "ConfigurableClassData", "class_name": "DagsterSqliteStorage", "config_yaml": "base_dir: /home/jag/dev/llactalab/rumbos/gps-duck/tutorial-project/output\\n", "module_name": "dagster.core.storage.sqlite_storage"}}', 'step_key': '"MG91_20230428_artefacto__raw_explore__MG91_artefacto_reloj_20230428_01_explore"', 'output_log_path': '"/tmp/tmpp1tuyhss"', 'marshal_dir': '"/tmp/dagstermill/ed1a0d6a-ea17-477c-9eca-74e1f40d4d81/marshal"', 'run_config': '{}'}.items()
    }
)
data = __dm_dagstermill._load_input_parameter('data')
   code                        asset_name  \
0  MG91  MG91_artefacto_reloj_20230428_01   
1  MG91  MG91_artefacto_reloj_20230428_02   
2  MG91    MG91_persona_reloj_20230428_01   
3  MG91    MG91_persona_reloj_20230428_02   
4  MG91  MG91_artefacto_reloj_20230503_01   
5  MG91  MG91_artefacto_reloj_20230503_02   
6  MG91    MG91_persona_reloj_20230503_01   
7  MG91    MG91_persona_reloj_20230503_02   

                              file_name folder_name          owner  \
0  MG91_artefacto_reloj_20230428_01.gpx        MG91  Emilia Acurio   
1  MG91_artefacto_reloj_20230428_02.gpx        MG91  Emilia Acurio   
2    MG91_persona_reloj_20230428_01.gpx        MG91  Emilia Acurio   
3    MG91_persona_reloj_20220428_02.gpx        MG91  Emilia Acurio   
4  MG91_artefacto_reloj_20230503_01.gpx        MG91  Emilia Acurio   
5  MG91_artefacto_reloj_20230503_02.gpx        MG91  Emilia Acurio   
6    MG91_persona_reloj_20230503_01.gpx        MG91  Emilia Acurio   
7    MG91_persona_reloj_20230503_02.gpx        MG91  Emilia Acurio   

                     group      date       type  
0  MG91_20230428_artefacto  20230428  artefacto  
1  MG91_20230428_artefacto  20230428  artefacto  
2    MG91_20230428_persona  20230428    persona  
3    MG91_20230428_persona  20230428    persona  
4  MG91_20230503_artefacto  20230503  artefacto  
5  MG91_20230503_artefacto  20230503  artefacto  
6    MG91_20230503_persona  20230503    persona  
7    MG91_20230503_persona  20230503    persona  
RESULT AssetsDefinition with key ["workdir", "MG91_20230428_artefacto_traj"]
RESULT AssetsDefinition with key ["workdir", "MG91_20230428_persona_traj"]
RESULT AssetsDefinition with key ["workdir", "MG91_20230503_artefacto_traj"]
RESULT AssetsDefinition with key ["workdir", "MG91_20230503_persona_traj"]
Hello job
/home/jag/anaconda3/envs/llacta-rumbos/lib/python3.10/site-packages/dagster/_core/definitions/resolved_asset_deps.py:22: ExperimentalWarning: Asset ["workdir", "MG91_persona_reloj_20230503_02_traj_clean"]'s dependency 'MG91_persona_reloj_20230503_02_traj' was resolved to upstream asset ["workdir", "MG91_persona_reloj_20230503_02_traj"], because the name matches and they're in the same group. This is experimental functionality that may change in a future release. To mute warnings for experimental functionality, invoke warnings.filterwarnings("ignore", category=dagster.ExperimentalWarning) or use one of the other methods described at https://docs.python.org/3/library/warnings.html#describing-warning-filters.
  self._deps_by_assets_def_id = resolve_assets_def_deps(assets_defs, source_assets)
/home/jag/anaconda3/envs/llacta-rumbos/lib/python3.10/site-packages/dagster/_core/definitions/resolved_asset_deps.py:22: ExperimentalWarning: Asset ["workdir", "MG91_artefacto_reloj_20230428_01_traj_clean"]'s dependency 'MG91_artefacto_reloj_20230428_01_traj' was resolved to upstream asset ["workdir", "MG91_artefacto_reloj_20230428_01_traj"], because the name matches and they're in the same group. This is experimental functionality that may change in a future release. To mute warnings for experimental functionality, invoke warnings.filterwarnings("ignore", category=dagster.ExperimentalWarning) or use one of the other methods described at https://docs.python.org/3/library/warnings.html#describing-warning-filters.
  self._deps_by_assets_def_id = resolve_assets_def_deps(assets_defs, source_assets)
2023-06-20 18:51:00 -0500 - dagster - DEBUG - __ASSET_JOB - ed1a0d6a-ea17-477c-9eca-74e1f40d4d81 - 2214827 - RESOURCE_INIT_STARTED - Starting initialization of resources [io_manager, mobilityDb_manager, output_notebook_io_manager].
2023-06-20 18:51:00 -0500 - dagster - DEBUG - __ASSET_JOB - ed1a0d6a-ea17-477c-9eca-74e1f40d4d81 - 2214827 - RESOURCE_INIT_SUCCESS - Finished initialization of resources [io_manager, mobilityDb_manager, output_notebook_io_manager].

Raw data import¶

In [ ]:
print("This dataset contains {} records.\nThe first lines are:".format(len(data)))
data.head()
This dataset contains 8000 records.
The first lines are:
Out[ ]:
lat lon elevation time file_path fila_name track_id codigo id geometry
0 -2.895220 -78.986263 None 2023-04-28 07:30:57+00:00 data/MG91/MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01 MG91 1 POINT (-78.98626 -2.89522)
1 -2.895220 -78.986260 None 2023-04-28 07:30:58+00:00 data/MG91/MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01 MG91 1 POINT (-78.98626 -2.89522)
2 -2.895220 -78.986260 None 2023-04-28 07:30:59+00:00 data/MG91/MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01 MG91 1 POINT (-78.98626 -2.89522)
3 -2.895220 -78.986258 None 2023-04-28 07:31:00+00:00 data/MG91/MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01 MG91 1 POINT (-78.98626 -2.89522)
4 -2.895217 -78.986260 None 2023-04-28 07:31:01+00:00 data/MG91/MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01.gpx MG91_artefacto_reloj_20230428_01 MG91 1 POINT (-78.98626 -2.89522)
In [ ]:
data.columns
Out[ ]:
Index(['lat', 'lon', 'elevation', 'time', 'file_path', 'fila_name', 'track_id',
       'codigo', 'id', 'geometry'],
      dtype='object')
In [ ]:
data.dtypes
Out[ ]:
lat                      float64
lon                      float64
elevation                 object
time         datetime64[ns, UTC]
file_path                 object
fila_name                 object
track_id                  object
codigo                    object
id                         int64
geometry                geometry
dtype: object
In [ ]:
data.crs
Out[ ]:
<Geographic 2D CRS: EPSG:4326>
Name: WGS 84
Axis Info [ellipsoidal]:
- Lat[north]: Geodetic latitude (degree)
- Lon[east]: Geodetic longitude (degree)
Area of Use:
- name: World.
- bounds: (-180.0, -90.0, 180.0, 90.0)
Datum: World Geodetic System 1984 ensemble
- Ellipsoid: WGS 84
- Prime Meridian: Greenwich

Geographic extent¶

In [ ]:
data.hvplot(title='Geographic extent of the dataset', geo=True, tiles='OSM', frame_height=450)
Out[ ]:
In [ ]:
code = data['codigo'].unique()[0]

temp = data.to_crs(CRS(32717))
temp['geometry'] = temp['geometry'].buffer(5)
total_area = temp.dissolve(by='codigo').area 
total_area = total_area[code]/1000
print('The total area covered by the data is: {:,.2f} km2'.format(total_area))
The total area covered by the data is: 63.77 km2

Temporal extent:¶

In [ ]:
print("The dataset covers the time between {} and {}.".format(data.time.min(), data.time.max()))
The dataset covers the time between 2023-04-28 07:30:57+00:00 and 2023-04-28 09:44:16+00:00.
In [ ]:
print("That's {}".format(data.time.max() - data.time.min()))
That's 0 days 02:13:19

Number of registers each 10 minutes¶

Each complete 10m interval should have 600 counts, except first and last

In [ ]:
ig, axs = plt.subplots(figsize=(12, 4))

df = data
df.set_index('time')
df.groupby(pd.Grouper(key='time', axis=0, 
                      freq='10min'))["id"].sum().plot(kind='bar',  ax=axs)
Out[ ]:
<Axes: xlabel='time'>

Sampling intervals¶

As the tracking gps are sensing with a resolution of 1s we expect to have records only in 1 second interval

In [ ]:
t = data.reset_index().time
df = data.assign(delta_t=t.diff().values)
df['delta_t'] = df['delta_t'].dt.total_seconds()
pd.DataFrame(df).hvplot.hist('delta_t', title='Histogram of intervals between consecutive records (in seconds)', bins=60, bin_range=(0, 60))
Out[ ]:

Spped values¶

For example: Does the data contain unattainable speeds?

In [ ]:
traj = mpd.Trajectory(data, traj_id='id', t='time')

traj.add_speed()
max_speed = traj.df.speed.max()
print("The highest computed speed is {:,.2f} m/s ({:,.2f} km/h)".format(max_speed, max_speed*3600/1000))
/home/jag/anaconda3/envs/llacta-rumbos/lib/python3.10/site-packages/movingpandas/trajectory.py:136: TimeZoneWarning: Time zone information dropped from trajectory. All dates and times will use local time. To use UTC or a different time zone, convert and drop time zone information prior to trajectory creation.
  warnings.warn(
The highest computed speed is 65.16 m/s (234.59 km/h)

Spped distribution¶

In [ ]:
pd.DataFrame(traj.df).hvplot.hist('speed', title='Histogram of speeds (in meters per second)', bins=100)
Out[ ]:
In [ ]:
speed = pd.DataFrame(traj.df).sort_values(by='speed', ascending=False)
speed_km_h = speed['speed']*3600/1000
speed_km_h.head(20)
Out[ ]:
time
2023-04-28 09:11:04    234.592493
2023-04-28 09:41:25    125.858600
2023-04-28 09:44:05    101.109475
2023-04-28 09:41:21     93.930480
2023-04-28 09:11:05     86.082492
2023-04-28 08:53:59     82.932706
2023-04-28 08:56:20     82.547829
2023-04-28 07:32:28     51.294701
2023-04-28 09:42:33     48.187384
2023-04-28 09:41:53     47.366488
2023-04-28 09:42:31     47.171383
2023-04-28 09:11:13     44.299663
2023-04-28 09:41:26     39.884652
2023-04-28 07:34:17     39.633788
2023-04-28 09:42:32     35.654988
2023-04-28 08:56:21     35.033008
2023-04-28 08:53:13     34.940074
2023-04-28 07:34:14     34.867773
2023-04-28 09:41:52     31.343546
2023-04-28 08:53:14     30.601808
Name: speed, dtype: float64
In [ ]:
 
In [ ]:
import dagstermill as __dm_dagstermill
__dm_dagstermill._teardown()